Using Advanced Search to Bulk Export
Sample Script
The following Python script illustrates how to execute a search that spans multiple pages, and save the results to a csv file.
- """
- Example script to run search profile (Assetic.SearchGetAPI.py)
- Search is paginated so set page size. On a per page basic the search
- results omit columns where all records are null for that column. This is
- catered for by building a list of columns returned by all pages
- """
- import assetic
- import csv
- import functools
- import sys
- # Assetic SDK instance
- asseticsdk = assetic.AsseticSDK("c:/users/you/assetic.ini",None,"Info")
- # Search API
- sapi = assetic.SearchApi()
- # Define search parameters
- searchguid = "f9149c1b-0a43-e611-945f-06edd62954d7"
- numpagesize = 50
- # Return only those records where Asset Class = "Transport"
- searchfilter = "ComplexAssetClass=Transport"
- # set search criteria as keyword args
- kw = {"request_params_id":searchguid,
- "request_params_page":1,
- "request_params_sorts":"ComplexAssetName-desc",
- "request_params_page_size":numpagesize,
- "request_params_filters":searchfilter}
- # Get first page of results
- try:
- sg = sapi.search_get(**kw)
- except assetic.rest.ApiException as e:
- asseticsdk.logger.error("Status {0}, Reason: {1} {2}".format(
- e.status, e.reason, e.body))
- # Log the number of records
- totalresults=sg.get("TotalResults")
- numpages = sg.get("TotalPages")
- asseticsdk.logger.info("Total Results: {0}, Total Pages: {1}".format(
- totalresults,numpages))
- # get data from nested output
- resourcelist = sg.get("ResourceList")
- resource = resourcelist[0]
- data = resource.get("Data")
- # Copy as "alldata" because we will be appending to this
- alldata = data
- # Get a list of columns
- columns = map( lambda x: x.keys(), data )
- if sys.version_info < (3,0):
- columns = reduce( lambda x,y: x+y, columns )
- else:
- columns = functools.reduce( lambda x,y: x|y, columns )
- # Write list of columns to the "all" list as this may grow
- allcolumns = columns
- # Now loop through remaining pages
- if numpages > 1:
- for pagenum in range(2,int(numpages) + 1):
- # set page number to get
- kw["request_params_page"]=pagenum
- asseticsdk.logger.info("Page: {0}".format(kw["request_params_page"]))
- # Now get results for this page
- try:
- sg = sapi.search_get(**kw)
- except assetic.rest.ApiException as e:
- asseticsdk.logger.error("Status {0}, Reason: {1} {2}".format(
- e.status, e.reason, e.body))
- # get actual data from nested output
- resourcelist = sg.get("ResourceList")
- resource = resourcelist[0]
- data = resource.get("Data")
- # Get column list for this page - there may be new columns
- columns = map(lambda x: x.keys(), data)
- if sys.version_info < (3, 0):
- columns = reduce(lambda x,y: x+y, columns)
- # Add new column list to "allcolumns", will get unique list later
- allcolumns = allcolumns + columns
- # append new data to "alldata"
- alldata = alldata + data
- else:
- columns = functools.reduce(lambda x,y: x|y, columns)
- # merge column list sets
- allcolumns = allcolumns | columns
- # append new data to "alldata"
- alldata.extend(data)
- if pagenum > 10:
- # catchall escape
- break
- if sys.version_info < (3, 0):
- # get unique list of columns
- columns = list(set(allcolumns))
- else:
- columns = allcolumns
- # create csv
- if sys.version_info < (3,0):
- with open( "c:/temp/road_dump.csv", "wb" ) as out_file:
- csv_w = csv.writer(out_file)
- csv_w.writerow(columns)
- for i_r in alldata:
- # map data to column list by key to avoid potential issues
- # with column order
- csv_w.writerow( map( lambda x: i_r.get( x, ""), columns))
- else:
- with open( "c:/temp/road_dump3.csv", "w", newline="") as out_file:
- csv_w = csv.writer(out_file)
- csv_w.writerow(columns)
- for i_r in alldata:
- # map data to column list by key to avoid potential issues
- # with column order
- csv_w.writerow(map(lambda x: i_r.get( x, ""), columns))
How it works
The search parameters are defined. The searchguid is the unique ID of the search profile that will be exported (refer to this article). The parameters include a filter by Asset Class = "Transport" to restrict the search results to only those records where the Asset Class is "Transport".
# Define search parameters searchguid = "f9149c1b-0a43-e611-945f-06edd62954d7" numpagesize = 50 # Return only those records where Asset Class = "Transport" searchfilter = "ComplexAssetClass=Transport"
The parameter "request_params_page" is set to '1' so that the first page of results are returned. The number of records is set by "request_params_page_size". The records are returned ordered by Asset Id in ascending order, set by "request_params_sorts, and filtered by request_params_filters"
# set search criteria as keyword args kw = {"request_params_id":searchguid, "request_params_page":1, "request_params_sorts":"ComplexAssetName-desc", "request_params_page_size":numpagesize, "request_params_filters":searchfilter}
The search is executed and the array of data obtained
# Get first page of results sg = sapi.search_get(**kw) # get actual data from nested output resourcelist = sg.get('ResourceList') resource = resourcelist[0] data = resource.get('Data')
Each page in subsequent requests may return a different number of fields. The following code block manages this by building a list of columns and holding an array of data (field name and value pairs).
# Copy as alldata becuase we will be appending to this alldata = data # Get a list of columns columns = map( lambda x: x.keys(), data ) columns = functools.reduce( lambda x,y: x|y, columns ) # Write list of columns to the 'all' list as this may grow allcolumns = columns
Subsequent pages are then requested as a loop using the total number of records and page size to determine the number of loops
for pagenum in range(2,int(numpages) + 1): # set page number to get kw['request_params_page']=pagenum asseticsdk.logger.('Page: {0}'.format(kw['request_params_page'])) # Now get results for this page try: sg = sapi.search_get(**kw) except assetic.rest.ApiException as e: asseticsdk.logger.error("Status {0}, Reason: {1} {2}".format( e.status, e.reason, e.body))
The data array for each page is added to the "alldata" array and the column list "columns" updated to include the columns in the page.
# get actual data from nested output resourcelist = sg.get('ResourceList') resource = resourcelist[0] data = resource.get('Data') # append new data to 'alldata' alldata = alldata + data # Get column list for this page - there may be new columns columns = map(lambda x: x.keys(), data) columns = functools.reduce(lambda x,y: x|y, columns) # merge column list sets
allcolumns = allcolumns | columns
Using the unique list of columns and the key/value pairs in the data array the csv file is created. This method also ensures that the data is written to the correct fields, since if there are variable field numbers in each page then the field order would be incorrect if the data were simply written to file page by page.
# create csv with open('c:/temp/road_dump.csv', 'w', newline='') as out_file: csv_w = csv.writer(out_file) csv_w.writerow(columns) for i_r in alldata: # map data to column list by key to avoid potential issues with column order csv_w.writerow(map(lambda x: i_r.get(x, ""), columns))